* Reset settings and initialize log file
launch, path("share/explain_demogs")

*-------------------------------------------------------------------------------
* Price and Wasserman (2024), "The Summer Drop in Female Employment"
*
* Description: Explain demographic heterogeneity in the summer drop in EPOP.
*-------------------------------------------------------------------------------


if "$estimate" != "0" {
	* Load data on adult individuals
	gzuse "$basepath/data/derived/cps_bms_sample.dta.gz", clear

	* Restrict to variables we need
	keep pid tm year month mish wtfinl linked_complete female age_bin wbho educ emp spouse_present youngest school

	* Focus on women
	keep if female == 1

	* Tag individuals observed throughout MIS 1-4, now observed in MIS 5-8, and linkable throughout
	bysort pid (tm): gen byte year_two = (mish[4] == 4 & inrange(mish, 5, 8) & linked_complete == 1)

	* Tag individuals connected to education in the first year
	bysort pid (tm): gen byte ever_ed = (school[1] | school[2] | school[3] | school[4])

	* Restrict to May and July
	keep if inlist(month, 5, 7)

	* Indicator for summer
	gen byte july = (month == 7)

	* Express EPOP in percent terms
	replace emp = 100 * emp

	* Shorten names
	rename age_bin age
	rename spouse_present married
	rename youngest youngest

	* Specify covariates
	local july "i.july"
	local demogs "i.july##i.age i.july##i.wbho i.july##ib2.educ"
	local household "i.july##i.married##i.youngest"
	local school "i.july##i.ever_ed"

	* Specify regression elements
	local wt "[aw = wtfinl]"
	local vce "vce(cluster pid)"

	* Examine heterogeneity: full sample
	quietly eststo m1: reg emp `demogs' `wt', `vce'
	estimates save "$basepath/models/explain_demogs/m1.ster", replace

	quietly eststo m2: reg emp `demogs' `household' `wt', `vce'
	estimates save "$basepath/models/explain_demogs/m2.ster", replace

	* Examine heterogeneity: continuous sample
	quietly eststo m3: reg emp `demogs' if year_two == 1 `wt', `vce'
	estimates save "$basepath/models/explain_demogs/m3.ster", replace

	quietly eststo m4: reg emp `demogs' `household' if year_two == 1 `wt', `vce'
	estimates save "$basepath/models/explain_demogs/m4.ster", replace

	quietly eststo m5: reg emp `demogs' `household' `school' if year_two == 1 `wt', `vce'
	estimates save "$basepath/models/explain_demogs/m5.ster", replace

	* Examine heterogeneity: non-education sample
	quietly eststo m6: reg emp `demogs' if year_two == 1 & ever_ed == 0 `wt', `vce'
	estimates save "$basepath/models/explain_demogs/m6.ster", replace

	quietly eststo m7: reg emp `demogs' `household' if year_two == 1 & ever_ed == 0 `wt', `vce'
	estimates save "$basepath/models/explain_demogs/m7.ster", replace
}


* Create a table showing point estimates and standard errors
*-------------------------------------------------------------------------------

* Turn the estimates into a dataset
tempfile coefs
capture postclose coefs
postfile coefs byte k str10 group str10 covariate byte model float pbeta float pstde using `coefs'

foreach k of numlist 1/7 {
	* Load estimates
	estimates use "$basepath/models/explain_demogs/m`k'.ster"

	* Sample size
	local N`k' = string(e(N), "%12.0gc")
	local R`k' = string(`=e(r2)', "%9.3f")

	* Coefficients and standard errors
	post coefs (01) ("age") ("age2529") (`k') (_b[1.july#1.age]) (_se[1.july#1.age])
	post coefs (02) ("age") ("age3034") (`k') (_b[1.july#2.age]) (_se[1.july#2.age])
	post coefs (03) ("age") ("age3539") (`k') (_b[1.july#3.age]) (_se[1.july#3.age])
	post coefs (04) ("age") ("age4044") (`k') (_b[1.july#4.age]) (_se[1.july#4.age])
	post coefs (05) ("age") ("age4549") (`k') (_b[1.july#5.age]) (_se[1.july#5.age])

	post coefs (06) ("wbho") ("white") (`k') (_b[1.july#1.wbho]) (_se[1.july#1.wbho])
	post coefs (07) ("wbho") ("black") (`k') (_b[1.july#2.wbho]) (_se[1.july#2.wbho])
	post coefs (08) ("wbho") ("latin") (`k') (_b[1.july#3.wbho]) (_se[1.july#3.wbho])
	post coefs (09) ("wbho") ("other") (`k') (_b[1.july#4.wbho]) (_se[1.july#4.wbho])

	post coefs (10) ("educ") ("lhs") (`k') (_b[1.july#1.educ]) (_se[1.july#1.educ])
	post coefs (11) ("educ") ("hsg") (`k') (_b[1.july#2.educ]) (_se[1.july#2.educ])
	post coefs (12) ("educ") ("smc") (`k') (_b[1.july#3.educ]) (_se[1.july#3.educ])
	post coefs (13) ("educ") ("clg") (`k') (_b[1.july#4.educ]) (_se[1.july#4.educ])
}

postclose coefs
use `coefs', clear

* Reshape the estimates
reshape long p, i(k group covariate model) j(parameter) string
reshape wide p, i(k group covariate parameter) j(model)
sort k parameter

* Format the estimates
forvalues p = 1/7 {
	gen p`p'_str = string(p`p', "%9.2f")
	drop p`p'
	rename p`p'_str p`p'
	replace p`p' = "(" + p`p' + ")" if parameter == "stde"

	* Zero out standard errors for omitted groups
	replace p`p' = "--" if inlist(covariate, "age2529", "white", "hsg")
}

* Write the header
capture file close demogs
file open demogs using "$basepath/output/explain_demogs.tex", write replace
file write demogs "\footnotesize"
file write demogs "\begin{tabularx}{7in}{p{2.25in}rrrrrrr}" _n
file write demogs "\toprule" _n
file write demogs "& (1) & (2) & (3) & (4) & (5) & (6) & (7) \\" _n
file write demogs "\midrule" _n

* Prepare the numerical cells
forvalues r = 1/26 {
	local row`r'
	forvalues k = 1/7 {
		local row`r' `"`row`r'' & `=p`k'[`r']'"'
	}
}

local hs   "\hspace{1em}"
local null "& & & &"

foreach g in "N" "R" {
	forvalues k = 1/7 {
		local add`g' "`add`g'' & ``g'`k''"
	}
}

* Write the rows
file write demogs "\multicolumn{8}{l}{\textbf{Age}} \\[0.5em]" _n
file write demogs "`hs' 25--29 (omitted)                                             `row1'  \\" _n
file write demogs "`hs'                                                              `row2'  \\[0.5em]" _n
file write demogs "`hs' 30--34                                                       `row3'  \\" _n
file write demogs "`hs'                                                              `row4'  \\[0.5em]" _n
file write demogs "`hs' 35--39                                                       `row5'  \\" _n
file write demogs "`hs'                                                              `row6'  \\[0.5em]" _n
file write demogs "`hs' 40--44                                                       `row7'  \\" _n
file write demogs "`hs'                                                              `row8'  \\[0.5em]" _n
file write demogs "`hs' 45--49                                                       `row9'  \\" _n
file write demogs "`hs'                                                              `row10' \\[1em]" _n
file write demogs "\multicolumn{8}{l}{\textbf{Race and ethnicity}} \\[0.5em]" _n
file write demogs "`hs' White non-Hispanic (omitted)                                 `row11'  \\" _n
file write demogs "`hs'                                                              `row12' \\[0.5em]" _n
file write demogs "`hs' Black non-Hispanic                                           `row13'  \\" _n
file write demogs "`hs'                                                              `row14' \\[0.5em]" _n
file write demogs "`hs' Hispanic or Latinx                                           `row15'  \\" _n
file write demogs "`hs'                                                              `row16' \\[0.5em]" _n
file write demogs "`hs' Other non-Hispanic                                           `row17'  \\" _n
file write demogs "`hs'                                                              `row18' \\[1em]" _n
file write demogs "\multicolumn{8}{l}{\textbf{Educational attainment}} \\[0.5em]" _n
file write demogs "`hs' Less than high school                                        `row19' \\" _n
file write demogs "`hs'                                                              `row20' \\[0.5em]" _n
file write demogs "`hs' High school graduate (omitted)                               `row21' \\" _n
file write demogs "`hs'                                                              `row22' \\[0.5em]" _n
file write demogs "`hs' Some college                                                 `row23' \\" _n
file write demogs "`hs'                                                              `row24' \\[0.5em]" _n
file write demogs "`hs' College graduate                                             `row25' \\" _n
file write demogs "`hs'                                                              `row26' \\[0.5em]" _n
file write demogs "\midrule" _n
file write demogs "\multicolumn{8}{l}{Sample restrictions:} \\" _n
file write demogs "`hs' Observed throughout first year                               &   &   & X & X & X & X & X \\" _n
file write demogs "`hs' Unconnected to ed. in first year                             &   &   &   &   &   & X & X \\[0.5em]" _n
file write demogs "\multicolumn{8}{l}{Controls for:} \\" _n
file write demogs "`hs' Household structure                                          &   & X &   & X & X &   & X \\" _n
file write demogs "`hs' Connection to ed. in first year                              &   &   &   &   & X &   &   \\[0.5em]" _n
file write demogs "Number of observations                                            `addN' \\" _n
file write demogs "R$^2$                                                             `addR' \\" _n

* Close out the table
file write demogs "\bottomrule" _n
file write demogs "\end{tabularx}"
file close demogs

* Close the log file
unlaunch
